
import re


def is_alphabet(uchar):
	if (uchar >= u'\u0041' and uchar <= u'\u005a') or (uchar >= u'\u0061' and uchar <= u'\u007a'):
		return True
	else:
		return False


def read02():
    f = open("hamlet.txt", "r")
    str_hamlet = " ".join(f.readlines())
    str_hamlet = str_hamlet.lower()

    list_hamlet = re.split(' |，|-|\|\r|\n', str_hamlet)
    f.close()

    str_ = ""

    for i in range(len(list_hamlet)):
        for j in list_hamlet[i]:
            if is_alphabet(j):  
                str_ += j
        str_ += ' '
    list_hamlet = str_.split()
    print(list_hamlet)
	
    hamlet_word = {}
    for i in list_hamlet:
        hamlet_word[i] = hamlet_word.get(i, 0)+1
    
    hamlet_word_sort = sorted(hamlet_word.items(), key=lambda x: x[1], reverse=True)
    print(hamlet_word_sort)

    f = open("hamlet_单词频度.txt", "w")

    for i in range(10):
        str_letter = str(hamlet_word_sort[i])  # 将元组转换成字符串
        f.write(str_letter+'\n')

    f.close()


read02()
